import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.patches as mpatches
from matplotlib import ticker
from plotly.subplots import make_subplots
from wordcloud import WordCloud
import matplotlib.dates as md
import seaborn as sns
sns.set(color_codes = True)
sns.set(style="whitegrid")
pd.set_option('display.max_columns', None)
df=pd.read_csv('country_vaccinations.csv')
print(df.head())
country iso_code date total_vaccinations people_vaccinated \
0 Afghanistan AFG 2021-02-22 0.0 0.0
1 Afghanistan AFG 2021-02-23 NaN NaN
2 Afghanistan AFG 2021-02-24 NaN NaN
3 Afghanistan AFG 2021-02-25 NaN NaN
4 Afghanistan AFG 2021-02-26 NaN NaN
people_fully_vaccinated daily_vaccinations_raw daily_vaccinations \
0 NaN NaN NaN
1 NaN NaN 1367.0
2 NaN NaN 1367.0
3 NaN NaN 1367.0
4 NaN NaN 1367.0
total_vaccinations_per_hundred people_vaccinated_per_hundred \
0 0.0 0.0
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
people_fully_vaccinated_per_hundred daily_vaccinations_per_million \
0 NaN NaN
1 NaN 35.0
2 NaN 35.0
3 NaN 35.0
4 NaN 35.0
vaccines \
0 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
1 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
2 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
3 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
4 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
source_name source_website
0 World Health Organization https://covid19.who.int/
1 World Health Organization https://covid19.who.int/
2 World Health Organization https://covid19.who.int/
3 World Health Organization https://covid19.who.int/
4 World Health Organization https://covid19.who.int/
print(df.info())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 21628 entries, 0 to 21627 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 country 21628 non-null object 1 iso_code 21628 non-null object 2 date 21628 non-null object 3 total_vaccinations 12275 non-null float64 4 people_vaccinated 11479 non-null float64 5 people_fully_vaccinated 8967 non-null float64 6 daily_vaccinations_raw 10187 non-null float64 7 daily_vaccinations 21412 non-null float64 8 total_vaccinations_per_hundred 12275 non-null float64 9 people_vaccinated_per_hundred 11479 non-null float64 10 people_fully_vaccinated_per_hundred 8967 non-null float64 11 daily_vaccinations_per_million 21412 non-null float64 12 vaccines 21628 non-null object 13 source_name 21628 non-null object 14 source_website 21628 non-null object dtypes: float64(9), object(6) memory usage: 2.5+ MB None
print(df.isnull().sum())
country 0 iso_code 0 date 0 total_vaccinations 9353 people_vaccinated 10149 people_fully_vaccinated 12661 daily_vaccinations_raw 11441 daily_vaccinations 216 total_vaccinations_per_hundred 9353 people_vaccinated_per_hundred 10149 people_fully_vaccinated_per_hundred 12661 daily_vaccinations_per_million 216 vaccines 0 source_name 0 source_website 0 dtype: int64
corr = df.corr(method ='pearson')
plt.figure(figsize=(20, 6))
sns.heatmap(corr, annot=True)
<AxesSubplot:>
df.fillna(0, inplace = True)
df['iso_code'].fillna('GBR', inplace=True)
df.drop(df.index[df['iso_code'] == 0], inplace = True)
df.drop(["source_name","source_website"],axis=1, inplace=True)
print(df.head(5))
country iso_code date total_vaccinations people_vaccinated \
0 Afghanistan AFG 2021-02-22 0.0 0.0
1 Afghanistan AFG 2021-02-23 0.0 0.0
2 Afghanistan AFG 2021-02-24 0.0 0.0
3 Afghanistan AFG 2021-02-25 0.0 0.0
4 Afghanistan AFG 2021-02-26 0.0 0.0
people_fully_vaccinated daily_vaccinations_raw daily_vaccinations \
0 0.0 0.0 0.0
1 0.0 0.0 1367.0
2 0.0 0.0 1367.0
3 0.0 0.0 1367.0
4 0.0 0.0 1367.0
total_vaccinations_per_hundred people_vaccinated_per_hundred \
0 0.0 0.0
1 0.0 0.0
2 0.0 0.0
3 0.0 0.0
4 0.0 0.0
people_fully_vaccinated_per_hundred daily_vaccinations_per_million \
0 0.0 0.0
1 0.0 35.0
2 0.0 35.0
3 0.0 35.0
4 0.0 35.0
vaccines
0 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
1 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
2 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
3 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
4 Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm...
# Function to PlotMap
def plot_map(variable, title, color):
new_dict = {}
for country in df.country.unique():
for i in range(len(df)):
#print(df.country[i])
if df.country[i] == country:
new_dict[country] = df[variable][i]
new_dict
new_df = pd.DataFrame.from_dict(new_dict, orient='index', columns=[variable])
if color == None:
place_map = px.choropleth(new_df, locations=new_df.index,
locationmode='country names',
color=variable,
hover_name=new_df.index)
else:
place_map = px.choropleth(new_df, locations=new_df.index,
locationmode='country names',
color=variable,
hover_name=new_df.index,
color_continuous_scale=color)
place_map.update_layout(
title_text=title,
title_x=0.5,
geo=dict(showocean=True, oceancolor="#7af8ff",
showland=True, landcolor="white",
showframe=False))
return place_map.show()
#Function to PlotPie
def plot_pie(value, title, color):
new_dict = {}
for v in df[value].unique():
value_count = 0
for i in range(len(df)):
# print(df[value].iloc[i])
# print(value)
if df[value].iloc[i] == v:
value_count += 1
new_dict[v] = value_count
# print(new_dict)
new_df = pd.DataFrame.from_dict(new_dict, orient='index', columns=['Total'])
if color == 'plasma':
fig = px.pie(new_df, values='Total',
names=new_df.index,
title=title,
color_discrete_sequence=px.colors.sequential.Plasma)
elif color == 'rainbow':
fig = px.pie(new_df, values='Total',
names=new_df.index,
title=title,
color_discrete_sequence=px.colors.sequential.Rainbow)
else:
fig = px.pie(new_df, values='Total',
names=new_df.index,
title=title)
fig.update_layout(
title={
'y': 0.95,
'x': 0.5
},
# legend_title = value
)
return fig.show()
#What vaccines are used and in which countries?
plot_map('vaccines','Vaccines Used in Different countries', None)
#Which vaccine is used in most of the countries?
plot_pie('vaccines', 'Various vaccines and their uses', 'plasma')
plt.figure(figsize=(20, 6))
data = df.groupby('country')['total_vaccinations_per_hundred'].max().sort_values(ascending=False)[:50]
plt.bar(data.index, data);
plt.xticks(rotation='90')
plt.yticks(np.arange(10.0, 160.0, step=20.0))
plt.title('Country vise Total Vaccination per Hundred', fontsize=24, fontweight='bold')
plt.ylabel('total vaccination per hundred');
plt.show()
#What country has vaccinated a larger percent from its population?
# from above Graph, Gibraltar is with the most people vaccinated (per hundred). May be due to less population (33,701 in 2019).
import ax as ax
import np as np
df_India = df[df["iso_code"] == 'IND'].copy()
df_India['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
plt.figure(figsize=(20,7))
sns.lineplot(data=df_India,x="date",y="people_vaccinated_per_hundred",marker='d',markersize= 12, color = 'k')
plt.title("India's daily vaccinations population trend")
plt.xticks(rotation=45)
plt.show();
plt.figure(figsize=(20,7))
sns.lineplot(data=df_India,x="date",y="daily_vaccinations_per_million",marker='o')
plt.title("India's daily vaccinations population trend")
plt.show
<function matplotlib.pyplot.show(close=None, block=None)>
#People Vaccinated
plt.figure(figsize=(20,7))
sns.lineplot(data=df_India,x="date",y="people_vaccinated",marker='o')
plt.show()
iso_code=df.iso_code.unique()
for row in iso_code:
df_county = df[df["iso_code"] == row].copy()
df_county['date'] = pd.to_datetime(df['date'], format = '%Y-%m-%d')
plt.figure(figsize=(20,7))
sns.lineplot(data=df_county,x="date",y="people_vaccinated_per_hundred",marker='d',markersize= 12, color = 'k')
plt.title('daily vaccinations population trend = %s' % (row))
plt.xticks(rotation=45)
plt.show();
plt.figure(figsize=(20,7))
sns.lineplot(data=df_county,x="date",y="daily_vaccinations_per_million",marker='o')
plt.title('daily vaccinations population trend = %s' % (row))
plt.show();
#People Vaccinated
plt.figure(figsize=(20,7))
sns.lineplot(data=df_county,x="date",y="people_vaccinated",marker='o')
plt.show();